
keeps<-read.csv("fec_donors.csv")


white_tab_fec<-aggregate(iswhiteperson~report_year, data=keeps, mean)
income_tab_fec<-aggregate(highinc~report_year, data=keeps, mean)
votes_tab_fec<-aggregate(highvote~report_year, data=keeps, mean)
dem_tab_fec<-aggregate(isdem~report_year, data=keeps, mean)

names(white_tab_fec)<-c("elec_year","iswhiteperson")
names(income_tab_fec)<-c("elec_year","highinc")
names(votes_tab_fec)<-c("elec_year","highvote")
names(dem_tab_fec)<-c("elec_year","isdem")
white_tab_fec$source<-"FEC"
income_tab_fec$source<-"FEC"
votes_tab_fec$source<-"FEC"
dem_tab_fec$source<-"FEC"


#############################
# build data for graphs
##########################


css2<- read.csv("css11_cv.csv")
css2$part<- css2$cash + css2$vouchers
css2$part[css2$part==2]<-1
css2$part<- css2$part*100
css2$income<- as.numeric(substr(css2$income,2,length(css2$income)))
css2$income_perc<- ceiling(ecdf(css2$income)(css2$income)*10)
css2$voucher_yr<-0
css2$voucher_yr[css2$elec_year>2015]<-1
css2$iswhiteperson<- 0
css2$iswhiteperson[css2$race=="European"]<- 1
css2$iswhiteperson[is.na(css2$race)]<- NA
css_c<- css2
css_v<- css2  
css_c$highinc<-0
css_c$highinc[css_c$income>100000]<-1
css_c$highinc[is.na(css_c$income_perc)]<-NA
css_v$highinc<-0
css_v$highinc[css_c$income>100000]<-1
css_v$highinc[is.na(css_v$income_perc)]<-NA
css_c$highvote<-0
css_c$highvote[css_c$sums>13]<-1
css_v$highvote<-0
css_v$highvote[css_c$sums>13]<-1
css_c$isdem<- 0
css_c$isdem[css_c$party=="Democratic"]<- 1
css_c$isdem[is.na(css_c$party)]<-NA
css_v$isdem<- 0
css_v$isdem[css_v$party=="Democratic"]<- 1
css_v$isdem[is.na(css_v$party)]<-NA


#race (pct white)

white_tab_c<-aggregate(iswhiteperson~elec_year+cash, data=css_c, mean)
white_tab_c<- subset(white_tab_c, white_tab_c$cash==1)
white_tab_v<-aggregate(iswhiteperson~elec_year+vouchers, data=css_v, mean)
white_tab_v<- subset(white_tab_v, white_tab_v$vouchers==1)

#income (pct in lowest half)

income_tab_c<-aggregate(highinc~elec_year+cash, data=css_c, mean)
income_tab_c<- subset(income_tab_c, income_tab_c$cash==1)
income_tab_v<-aggregate(highinc~elec_year+vouchers, data=css_v, mean)
income_tab_v<- subset(income_tab_v, income_tab_v$vouchers==1)

#past elections voted (pct 12+)
votes_tab_c<-aggregate(highvote~elec_year+cash, data=css_c, mean)
votes_tab_c<- subset(votes_tab_c, votes_tab_c$cash==1)
votes_tab_v<-aggregate(highvote~elec_year+vouchers, data=css_v, mean)
votes_tab_v<- subset(votes_tab_v, votes_tab_v$vouchers==1)

# party (pct Democrat)
dem_tab_c<-aggregate(isdem~elec_year+cash, data=css_c, mean)
dem_tab_c<- subset(dem_tab_c, dem_tab_c$cash==1)
dem_tab_v<-aggregate(isdem~elec_year+vouchers, data=css_v, mean)
dem_tab_v<- subset(dem_tab_v, dem_tab_v$vouchers==1)





#####################


#build graphs
white_tab_c$source<-"cash"
white_tab_v$source<-"voucher"
income_tab_c$source<-"cash"
income_tab_v$source<-"voucher"
dem_tab_c$source<-"cash"
dem_tab_v$source<-"voucher"
votes_tab_c$source<-"cash"
votes_tab_v$source<-"voucher"

names(white_tab_fec)<-c("elec_year","iswhiteperson","source")
names(dem_tab_fec)<-c("elec_year","isdem","source")
names(income_tab_fec)<-c("elec_year","highinc","source")
names(votes_tab_fec)<-c("elec_year","highvote","source")

white_tab<- bind_rows(white_tab_c, white_tab_v, white_tab_fec)
income_tab<- bind_rows(income_tab_c, income_tab_v, income_tab_fec)
dem_tab<- bind_rows(dem_tab_c, dem_tab_v, dem_tab_fec)
votes_tab<- bind_rows(votes_tab_c, votes_tab_v, votes_tab_fec)

breaks1<-c(2011,2013,2015,2017,2019,2021)
white_tab$iswhite<- white_tab$iswhiteperson*100
whitecount<- sum(css_c$iswhiteperson)/NROW(css_c$iswhiteperson)
pt_whites<- ggplot(white_tab[white_tab$elec_year>2010,], aes(x=elec_year, y=iswhite, group=source))+geom_line(aes(col=source))+theme_light()+
  geom_point(aes(col=source, shape=source))+ggtitle("% White")+ theme(legend.position="none")+ylab("")+
  ylim(35,100)+geom_hline(yintercept= whitecount*100, linetype="dashed")+xlab("")+scale_color_brewer(palette = "Set1")+
  scale_x_continuous(breaks =breaks1 , minor_breaks = 1)
pt_whites

yint_inc<- mean(css_c$highinc, na.rm=T)
income_tab$highinc<- income_tab$highinc*100
pt_inc<- ggplot(income_tab[income_tab$elec_year>2010,], aes(x=elec_year, y=highinc, group=source))+geom_line(aes(col=source))+theme_light()+
  geom_point(aes(col=source, shape=source))+ggtitle("% >$100k Household Income")+ theme(legend.position="none")+ylab("")+
  ylim(35,100)+ geom_hline(yintercept = yint_inc*100, linetype="dashed")+xlab("")+scale_color_brewer(palette = "Set1")+
  scale_x_continuous(breaks =breaks1 , minor_breaks = 1)
pt_inc

dem_tab$isdem<- dem_tab$isdem*100
demcount<- sum(css_c$isdem)
pt_dem<- ggplot(dem_tab[income_tab$elec_year>2010,], aes(x=elec_year, y=isdem, group=source))+geom_line(aes(col=source))+theme_light()+
  geom_point(aes(col=source, shape=source))+ggtitle("% Democrat")+ theme(legend.position="none")+ylab("")+ylim(35,100)+ 
  geom_hline(yintercept = demcount*100/NROW(css_c), linetype="dashed")+xlab("")+scale_color_brewer(palette = "Set1")+
  scale_x_continuous(breaks =breaks1 , minor_breaks = 1)
pt_dem

votes_tab$highvote<- votes_tab$highvote*100
votecount<- NROW(css_c$highvote[css_c$highvote==1])*100/NROW(css_c$highinc) 
pt_votes<- ggplot(votes_tab[income_tab$elec_year>2010,], aes(x=elec_year, y=highvote, group=source))+geom_line(aes(col=source))+theme_light()+
  geom_point(aes(col=source, shape=source))+ggtitle("% Frequent Voters")+ theme(legend.position="none")+ylab("")+ylim(25,100)+ 
  geom_hline(yintercept = votecount, linetype="dashed")+xlab("")+scale_color_brewer(palette = "Set1")+
  scale_x_continuous(breaks =breaks1 , minor_breaks = 1)
pt_votes


votes_tab$source[votes_tab$source=="cash"]<-"Municipal cash"
votes_tab$source[votes_tab$source=="voucher"]<-"Voucher"
votes_tab$source[votes_tab$source=="FEC"]<-"Federal cash"
votes_tab$source<- as.factor(votes_tab$source)
votes_tab$source<- factor(votes_tab$source, levels = c("Municipal cash","Federal cash","Voucher"))
names(votes_tab)<-c("elec_year", "cash"    ,  "highvote",  "Source","vouchers" )
aa<- ggplot(votes_tab, aes(x=elec_year, y=highvote, group=Source))+geom_line(aes(col=Source))+theme_light()+
  geom_point(aes(col=Source, shape=Source))+ggtitle("% Frequent Voters")+ylab("")+ylim(35,90)+scale_color_brewer(palette = "Set1")


legend_b <- get_legend(
  aa +
    guides(color = guide_legend(nrow = 1)) +
    theme(legend.position = "bottom")
)

title_theme <- ggdraw() +
  draw_label("Donor Demographics Over Time")



prow<-plot_grid(pt_whites,pt_inc,pt_dem,pt_votes, ncol=2)
out<-plot_grid(prow, legend_b,ncol=1, rel_heights = c(1,.1))
out #fig 2


#############################
#now with dollars
#############################


white_tab_fec<-aggregate(contribution_receipt_amount~report_year+iswhiteperson, data=keeps, sum)
white_tab_fec$nwa<- rep(white_tab_fec$contribution_receipt_amount[17:32],2)
white_tab_fec<- white_tab_fec[1:16,]
white_tab_fec$white_pct<- 100- white_tab_fec$contribution_receipt_amount/ (white_tab_fec$contribution_receipt_amount + white_tab_fec$nwa) *100



highinc_tab_fec<-aggregate(contribution_receipt_amount~report_year+highinc, data=keeps, sum)
highinc_tab_fec$nwa<- rep(highinc_tab_fec$contribution_receipt_amount[17:32],2)
highinc_tab_fec<- highinc_tab_fec[1:16,]
highinc_tab_fec$highinc_pct<- 100- highinc_tab_fec$contribution_receipt_amount/ (highinc_tab_fec$contribution_receipt_amount + highinc_tab_fec$nwa) *100

highvote_tab_fec<-aggregate(contribution_receipt_amount~report_year+highvote, data=keeps, sum)
highvote_tab_fec$nwa<- rep(highvote_tab_fec$contribution_receipt_amount[17:32],2)
highvote_tab_fec<- highvote_tab_fec[1:16,]
highvote_tab_fec$highvote_pct<- 100- highvote_tab_fec$contribution_receipt_amount/ (highvote_tab_fec$contribution_receipt_amount + highvote_tab_fec$nwa) *100

isdem_tab_fec<-aggregate(contribution_receipt_amount~report_year+isdem, data=keeps, sum)
isdem_tab_fec$nwa<- rep(isdem_tab_fec$contribution_receipt_amount[17:32],2)
isdem_tab_fec<- isdem_tab_fec[1:16,]
isdem_tab_fec$isdem_pct<- 100- isdem_tab_fec$contribution_receipt_amount/ (isdem_tab_fec$contribution_receipt_amount + isdem_tab_fec$nwa) *100


white_tab_fec<- white_tab_fec[,c(1,5)]
highinc_tab_fec<- highinc_tab_fec[,c(1,5)]
highvote_tab_fec<- highvote_tab_fec[,c(1,5)]
isdem_tab_fec<- isdem_tab_fec[,c(1,5)]
white_tab_fec$source<-"FEC"
highinc_tab_fec$source<-"FEC"
highvote_tab_fec$source<-"FEC"
isdem_tab_fec$source<-"FEC"

white_tab<-aggregate(cash_amt~elec_year+iswhiteperson, data=css_c, sum)
white_tab$nwa<- rep(white_tab$cash_amt[7:12],2)
white_tab<- white_tab[1:6,]
white_tab$white_pct<- 100- white_tab$cash_amt/ (white_tab$cash_amt + white_tab$nwa) *100



highinc_tab<-aggregate(cash_amt~elec_year+highinc, data=css_c, sum)
highinc_tab$nwa<- rep(highinc_tab$cash_amt[7:12],2)
highinc_tab<- highinc_tab[1:6,]
highinc_tab$highinc_pct<- 100- highinc_tab$cash_amt/ (highinc_tab$cash_amt + highinc_tab$nwa) *100

highvote_tab<-aggregate(cash_amt~elec_year+highvote, data=css_c, sum)
highvote_tab$nwa<- rep(highvote_tab$cash_amt[7:12],2)
highvote_tab<- highvote_tab[1:6,]
highvote_tab$highvote_pct<- 100- highvote_tab$cash_amt/ (highvote_tab$cash_amt + highvote_tab$nwa) *100


isdem_tab<-aggregate(cash_amt~elec_year+isdem, data=css_c, sum)
isdem_tab$nwa<- rep(isdem_tab$cash_amt[7:12],2)
isdem_tab<- isdem_tab[1:6,]
isdem_tab$isdem_pct<- 100- isdem_tab$cash_amt/ (isdem_tab$cash_amt + isdem_tab$nwa) *100

white_tabv<-aggregate(vouchers_amt~elec_year+iswhiteperson, data=css_v, sum)
white_tabv$nwa<- rep(white_tabv$vouchers_amt[7:12],2)
white_tabv<- white_tabv[4:6,]
white_tabv$white_pct<- 100- white_tabv$vouchers_amt/ (white_tabv$vouchers_amt + white_tabv$nwa) *100

highinc_tabv<-aggregate(vouchers_amt~elec_year+highinc, data=css_v, sum)
highinc_tabv$nwa<- rep(highinc_tabv$vouchers_amt[7:12],2)
highinc_tabv<- highinc_tabv[4:6,]
highinc_tabv$highinc_pct<- 100- highinc_tabv$vouchers_amt/ (highinc_tabv$vouchers_amt + highinc_tabv$nwa) *100

highvote_tabv<-aggregate(vouchers_amt~elec_year+highvote, data=css_v, sum)
highvote_tabv$nwa<- rep(highvote_tabv$vouchers_amt[7:12],2)
highvote_tabv<- highvote_tabv[4:6,]
highvote_tabv$highvote_pct<- 100- highvote_tabv$vouchers_amt/ (highvote_tabv$vouchers_amt + highvote_tabv$nwa) *100

isdem_tabv<-aggregate(vouchers_amt~elec_year+isdem, data=css_v, sum)
isdem_tabv$nwa<- rep(isdem_tabv$vouchers_amt[7:12],2)
isdem_tabv<- isdem_tabv[4:6,]
isdem_tabv$isdem_pct<- 100- isdem_tabv$vouchers_amt/ (isdem_tabv$vouchers_amt + isdem_tabv$nwa) *100

white_tab<- bind_rows(white_tab, white_tabv)
isdem_tab<- bind_rows(isdem_tab, isdem_tabv)
highvote_tab<- bind_rows(highvote_tab, highvote_tabv)
highinc_tab<- bind_rows(highinc_tab, highinc_tabv)
white_tab$source<- c("cash","cash","cash","cash","cash","cash","voucher","voucher","voucher")
highinc_tab$source<- c("cash","cash","cash","cash","cash","cash","voucher","voucher","voucher")
highvote_tab$source<- c("cash","cash","cash","cash","cash","cash","voucher","voucher","voucher")
isdem_tab$source<- c("cash","cash","cash","cash","cash","cash","voucher","voucher","voucher")

names(white_tab_fec)<-c("elec_year","white_pct","source")
names(highinc_tab_fec)<-c("elec_year","highinc_pct","source")
names(highvote_tab_fec)<-c("elec_year","highvote_pct","source")
names(isdem_tab_fec)<-c("elec_year","isdem_pct","source")


white_tab<- bind_rows(white_tab[,c(1,5,7)], white_tab_fec)
highinc_tab<- bind_rows(highinc_tab[,c(1,5,7)], highinc_tab_fec)
highvote_tab<- bind_rows(highvote_tab[,c(1,5,7)], highvote_tab_fec)
isdem_tab<- bind_rows(isdem_tab[,c(1,5,7)], isdem_tab_fec)

pt_whites<- ggplot(white_tab[income_tab$elec_year>2010,], aes(x=elec_year, y=white_pct, group=source))+geom_line(aes(col=source))+theme_light()+
  geom_point(aes(col=source, shape=source))+ggtitle("% White")+ theme(legend.position="none")+ylab("")+
  ylim(35,100)+geom_hline(yintercept= whitecount*100, linetype="dashed")+xlab("")+scale_color_brewer(palette = "Set1")
pt_whites

pt_inc<- ggplot(highinc_tab[income_tab$elec_year>2010,], aes(x=elec_year, y=highinc_pct, group=source))+geom_line(aes(col=source))+theme_light()+
  geom_point(aes(col=source, shape=source))+ggtitle("% >100k Income")+ theme(legend.position="none")+
  ylab("")+ylim(35,100)+ geom_hline(yintercept =yint_inc*100 , linetype="dashed")+xlab("")+scale_color_brewer(palette = "Set1")
pt_inc

pt_dem<- ggplot(isdem_tab[income_tab$elec_year>2010,], aes(x=elec_year, y=isdem_pct, group=source))+geom_line(aes(col=source))+theme_light()+
  geom_point(aes(col=source, shape=source))+ggtitle("% Democrat")+ theme(legend.position="none")+ylab("")+
  ylim(35,100)+ geom_hline(yintercept = demcount*100/NROW(css_c), linetype="dashed")+xlab("")+scale_color_brewer(palette = "Set1")
pt_dem

pt_votes<- ggplot(highvote_tab[income_tab$elec_year>2010,], aes(x=elec_year, y=highvote_pct, group=source))+geom_line(aes(col=source))+theme_light()+
  geom_point(aes(col=source, shape=source))+ggtitle("% Frequent Voters")+ theme(legend.position="none")+ylab("")+
  ylim(25,100)+ geom_hline(yintercept = votecount, linetype="dashed")+xlab("")+scale_color_brewer(palette = "Set1")
pt_votes

prow<-plot_grid(pt_whites,pt_inc,pt_dem,pt_votes, ncol=2)
out<-plot_grid(prow, legend_b,ncol=1, rel_heights = c(1,.1))
out #fig a4

